********************************************************************************
* Code for linking schools to nearest wind monitors, calculating downwind value for
* schools, getting annual averages (by hour) of the downwind, windtreat and circtreat
* variables. 

********************************************************************************

clear all 
set more off
set maxvar 32767

local main "C:\Users\das13016\Dropbox\Research and Referee work\papers\inprocess\Pollution\Research on Florida Wind Patters"


*********************
scalar prefdist = 5 	// how many miles between school and wind monitor?
********************* 
scalar mitoroad = 1 // how many miles between school and road?
********************* 
local yy = 2010



********************************************************************************
// 1. Create a data set with uniqueID, school lon and lat
********************************************************************************

foreach sample in aadt25k mjrrds {

cd "`main'\roadsetup\samples\"

use "schooltomajorrds_`sample'_5closest", clear // EDITED BY JAH 5/14

merge 1:1 ncessch using "`main'\School wind pollution\School wind road\NCES_2010" // EDITED BY JAH 5/14
tab _merge
drop if _merge==2

keep if mi_to_nid1<=mitoroad // mitoroad defined on line 19
keep uniqueID ncessch schnam latcod loncod
rename (schnam latcod loncod) (schoolname latitude longitude)
gen id = 1
tempfile school
save `school', replace

********************************************************************************
// 2.  Link schools to nearest wind monitors used in the final first stage data set
// winddir_dropmonitor dropped all monitors with >5% zero values (see wnp_FL_lax_dropm.do)
********************************************************************************
use "`main'\School wind pollution\School wind road\winddir_dropmonitor", clear  // EDITED BY JAH 5/14
levelsof station, loc(stationID) clean
scalar nwind = wordcount("`stationID'")
tempfile winddir
save `winddir', replace
keep station lat lon
drop if mi(lat) & mi(lon)
duplicates drop station, force
gen id = 1
rename (lat lon) (lat_ lon_)
reshape wide lat_ lon_, i(id) j(station) string
merge 1:m id using `school', nogen
drop id
order uniqueID schoolname latitude longitude

// calculate distance between each wind station and each school
foreach stid of local stationID {
	//nearstat latitude longitude, near(lat_`stid' lon_`stid') distvar(dist_`stid') r(3958.761) // too slow
	geodist latitude longitude lat_`stid' lon_`stid', gen(dist_`stid') miles sphere
	}
	
// sort distances in ascending order
rowsort dist_*, gen(dist1-dist`=nwind') 
drop dist2-dist`=nwind'
// identify closest wind stations within `prefdist' miles of school
forval vv = 1/1 { // we want closest wind montior, otherwise use `=nwind' {
	replace dist`vv' = . if dist`vv'>prefdist // exclude wind sites that are further than "prefdist" miles, prefdist specified at the very top 
	gen windsite`vv' = ""
	gen windlat`vv' = .
	gen windlon`vv' = .
	foreach ww of local stationID {
		replace windsite`vv' = "`ww'" if abs(dist`vv' - dist_`ww')<0.0001
		replace windlat`vv' = lat_`ww' if abs(dist`vv' - dist_`ww')<0.0001
		replace windlon`vv' = lon_`ww' if abs(dist`vv' - dist_`ww')<0.0001
	}
}
dropmiss *, force // drops distance variables that have all missing observations
drop dist_* lat_* lon_*
drop if mi(windsite1)
order uniqueID ncessch schoolname dist* windsite*
tempfile nearest
save `nearest', replace
save "`main'\roadsetup\samples\school_wind_5nearest", replace	// saved mostly for troubleshooting EDITED BY JAH 5/14

********************************************************************************
// 3. Merge school-station to wind direction data
// First expand schools to have a full year 8am-3pm observations, then loop to merge
********************************************************************************
// to speed up the process, expand only between 8am and 3pm. 
gen date = date("`yy'/01/01", "YMD") if _n==1
replace date = date("`yy'/12/31", "YMD") if _n==_N
format date %td
carryforward date, replace

xtset uniqueID date
tsfill, full  // for each school, generate dates from jan 1 to dec 31
bysort uniqueID: carryforward ncessch schoolname *1 *lat* *lon*, replace
gsort uniqueID -date // take care of the last school on the list
by uniqueID: carryforward ncessch schoolname *1 *lat* *lon*, replace
sort uniqueID date
expand 2 // duplicates the dataset
bysort uniqueID date: gen id = _n
gen hh = hms(8,00,00) if id == 1
replace hh = hms(15,00,00) if id==2
gen double datetime = date*24*60*60*1000 + hh
format datetime %tcNN/DD/CCYY_HH:MM
egen panelvar = group(uniqueID date)
xtset panelvar datetime, delta(1 hour)
tsfill	// expand each day for each school to have 8am-3pm observations
ds panelvar, not
bysort panelvar: carryforward `r(varlist)', replace
drop date hh id panelvar
gen station = windsite1
tempfile schfill
save `schfill', replace
// merge with wind direction data
use "`main'\School wind pollution\School wind road\winddir_dropmonitor", clear // EDITED BY JAH 5/14
merge 1:m station datetime using `schfill', nogen
drop if mi(uniqueID)
order datetime uniqueID ncessch schoolname dist1 windsite1 circmean windvalue im_circ* im_wind* *lat* *lon*
sort uniqueID datetime
rename (latitude longitude) (schlat schlon)

save "`main'\roadsetup\samples\schwind`=prefdist'mi_schroad`=mitoroad'mi_`sample'_5nearest.dta", replace  // EDITED BY DES 5/17

}


